This notebook shows the advantages of Diffusion t-SNE using an example of dataset generated by Farrell et al. 2018 in a study: "Single-cell reconstruction of developmental trajectories during zebrafish embryogenesis".
We show how multi-scale views of the input data can be generated using our method. In particular, small scale to large scale structures can be learned by varying the time step parameter of Diffusion t-SNE. Unlike varying the vanilla t-SNE perplexity parameter, this procedures results in an effective representation of global data geometry evem if the data lies around a non-linear, potentially curved manifold.
# First we load all the necessary packages and functions
import os, sys, mkl, time, pickle
nthreads = 20 # limit the number of threads for all operations
os.environ["OMP_NUM_THREADS"] = str(nthreads) # must be set before loading numpy
mkl.set_num_threads(nthreads)
import numpy as np
import pandas as pd
import multiprocessing as mp
# plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.titlesize'] = 24
mpl.rcParams['axes.titlesize'] = 20
mpl.rcParams['axes.labelsize'] = 20
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
# Custom pyscripts
maindir = '/home/lanhuong/Projects/ManifoldLearning/DiffusionTSNE'
os.chdir(maindir)
sys.path.insert(0, maindir)
from diffusion_tsne import diffusion_tsne
from plotting import *
from generate_data import *
from utils import *
from metrics import *
%load_ext autoreload
%autoreload 2
MACHINE_EPSILON = np.finfo(np.double).eps
# Load preprocessed data
cell_info = pd.read_pickle("data/farrell2018/farrell_cell_info.pkl")
df = pd.read_pickle("data/farrell2018/farrell_all_samples.pkl")
df.shape
# Colors to match the original paper by Farrell et al.
cols = ['#cccccc', '#999999', '#cab2d6', '#6a3d9a', '#fdbf6f', '#ff7f00',
'#fb9a99', '#e31a1c', '#b2df8a', '#33a02c', '#a6cee3', '#1f78b4']
We download all the data files available on the Broad Institute single cell data portal (log-in required) from: https://portals.broadinstitute.org/single_cell/study/SCP162/single-cell-reconstruction-of-developmental-trajectories-during-zebrafish-embryogenesis
%%bash
cd ./data/farrell2018
gunzip URD_Dropseq_Expression_Log2TPM.txt.gz;
# Note that the data is already in the log2 TPM format!
filename = "./data/farrell2018/URD_Dropseq_Expression_Log2TPM.txt" # large file: 1.5GB
dftest = pd.read_table(filename, nrows = 5, index_col=0)
print(dftest.shape)
dftest.iloc[:, 1:5]
start = time.time()
df = pd.read_table(filename, index_col=0)
end = time.time()
print("Read (%d x %d) table with pandas in %f sec.\n"
%(df.shape[0], df.shape[1], end - start))
# Read (17239 x 38731) table with pandas in 2091.302009 sec.
df.transpose().to_pickle("data/farrell2018/farrell_all_samples.pkl")
df = df.transpose()
df.iloc[0:5, 1:5]
df.shape
cell_info = pd.read_table("./data/farrell2018/URD_Dropseq_Meta.txt", index_col = 0)
cell_info = cell_info.iloc[1:,]
cell_info.head()
print(df.index[0:10])
print(cell_info.index[0:10])
np.all(df.index == cell_info.index)
pd.crosstab(index = cell_info.Stage, columns = "Count")
cell_info.to_pickle("data/farrell2018/farrell_cell_info.pkl")
X = df.values
# center the columns
X_colmean = X.mean(axis = 0)
X_cntr = X - X_colmean[np.newaxis, :]
# Do randomized PCA/SVD and keep 50 dimensions
from sklearn.utils.extmath import randomized_svd
nPC = 50; seed = int(time.time())
U, s, VT = randomized_svd(X_cntr, n_components=nPC, random_state=seed)
X50PCs = np.dot(U, np.diag(s))
X50PCs.shape
plt.figure(figsize = (10, 7))
plt.bar(range(nPC), s**2, log = True)
var_exp = np.round(100*s**2/np.sum(s**2))
plot2D(X50PCs[:, 0:2], label=cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axes().set_aspect(aspect=1)
plt.xlabel("PC1 [%4.1f %%]" %var_exp[0])
plt.ylabel("PC2 [%4.1f %%]" %var_exp[1])
plot2D(X50PCs[:, 1:3], label=cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axes().set_aspect(aspect=1)
plt.xlabel("PC2 [%4.1f %%]" %var_exp[1])
plt.ylabel("PC3 [%4.1f %%]" %var_exp[2])
import umap
# in our implementation of FIt-SNE we use entropy = np.log(perplexity) whereas in
# umap one has entropy = np.log2(n_neigh) so we need to adjust:
# np.log(100) = np.log2(n_neigh) ==> n_neigh = 2**np.log(100)
# n_neigh [10-15] ==> perplexity [30-50]
H = np.log(100)
n_neigh = int(2**H)
print("n_neigh used: %d" %n_neigh)
start = time.time()
umap_p100 = umap.UMAP(n_neighbors=n_neigh)
Y_umap_p100_spec_init = umap_p100.fit_transform(X50PCs)
end = time.time()
print('UMAP embedding in %f sec' %(end-start))
#UMAP embedding in 442.310949 sec
plot2D(Y_umap_p100_spec_init, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
import umap
H = np.log(1000)
n_neigh = int(2**H)
print("n_neigh used: %d" %n_neigh)
start = time.time()
umap_p1000 = umap.UMAP(n_neighbors=n_neigh)
umap_p1000 = umap_p1000.fit(X50PCs)
Y_umap_p1000_spec_init = umap_p1000.embedding_
end = time.time()
print('UMAP embedding in %f sec' %(end-start))
start = time.time()
Y_tsne_p50 = diffusion_tsne(
X50PCs, perplexity=50, seed=42, scale_probs = False,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
betas50 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees50 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist50 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Y_tsne_p50, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_tsne_p50, label= 1/(2*betas50), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm())
plt.axis('equal')
start = time.time() # 0.007 frac nnz
Y_tsne_p100 = diffusion_tsne(
X50PCs, perplexity=100, seed=42, scale_probs = False,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
betas100 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees100 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist100 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Y_tsne_p100, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_tsne_p100, label= 1/(2*betas100), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm()) # mpl.colors.PowerNorm(gamma=1./2.)))
plt.axis('equal')
start = time.time() # 0.03 frac nnz
Y_tsne_p300 = diffusion_tsne(
X50PCs, perplexity=300, seed=42, scale_probs = False,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
betas300 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees300 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist300 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Y_tsne_p300, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_tsne_p300, label= 1/(2*betas300), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm()) # mpl.colors.PowerNorm(gamma=1./2.)))
plt.axis('equal')
start = time.time()
Y_tsne_p500 = diffusion_tsne(
X50PCs, perplexity=500, seed=42, scale_probs = False,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
betas500 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees500 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist500 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Y_tsne_p500, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_tsne_p500, label= 1/(2*betas500), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm()) # mpl.colors.PowerNorm(gamma=1./2.)))
plt.axis('equal')
start = time.time()
Y_tsne_p1000 = diffusion_tsne(
X50PCs, perplexity=1000, seed=42, scale_probs = False,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
betas1000 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees1000 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist1000 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Y_tsne_p1000, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_tsne_p1000, label= 1/(2*betas1000), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm()) # mpl.colors.PowerNorm(gamma=1./2.)))
plt.axis('equal')
import pickle
vanilla_tsne_res = {
'Y_tsne_p50':Y_tsne_p50, 'betas50': betas50,
'Y_tsne_p100':Y_tsne_p100, 'betas100': betas100,
'Y_tsne_p300':Y_tsne_p300, 'betas300': betas300,
'Y_tsne_p500':Y_tsne_p1000, 'betas500': betas500,
'Y_tsne_p1000':Y_tsne_p1000, 'betas1000': betas1000,
}
with open('examples/Farrell2018/farrell_vanilla_tsne_res.pkl', 'wb') as handle:
pickle.dump(vanilla_tsne_res, handle, protocol=pickle.HIGHEST_PROTOCOL)
start = time.time()
Y_scaletsne_p50 = diffusion_tsne(
X50PCs, perplexity=50, seed=42, scale_probs = True,
nthreads=nthreads)
end = time.time()
print('Scale t-SNE embedding in %f sec' %(end-start))
plot2D(Y_scaletsne_p50,cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_scaletsne_p50, label= 1/(2*betas50), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm())
plt.axis('equal')
We note that as expected there is not much difference between vanilla t-SNE and the scaled t-SNE, because the local variances do not differ a lot across regions, and the bandwidth parameters are roughly the same, indicating that the standard t-SNE does not distort the data by much.
start = time.time()
Y_scaletsne_p100 = diffusion_tsne(
X50PCs, perplexity=100, seed=42, scale_probs = True,
nthreads=nthreads)
end = time.time()
print('Scale t-SNE embedding in %f sec' %(end-start))
plot2D(Y_scaletsne_p100,cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_scaletsne_p100, label= 1/(2*betas100), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm())
plt.axis('equal')
start = time.time()
Y_scaletsne_p500 = diffusion_tsne(
X50PCs, perplexity=500, seed=42, scale_probs = True,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('Scale t-SNE embedding in %f sec' %(end-start))
plot2D(Y_scaletsne_p500, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_scaletsne_p500, label= 1/(2*betas500), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm())
plt.axis('equal')
start = time.time()
Y_scaletsne_p1000 = diffusion_tsne(
X50PCs, perplexity=1000, seed=42, scale_probs = True,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('Scale t-SNE embedding in %f sec' %(end-start))
plot2D(Y_scaletsne_p1000, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
plot2D(Y_scaletsne_p1000, label= 1/(2*betas1000), s=25, figsize=(12,10),
norm= mpl.colors.LogNorm())
plt.axis('equal')
import pickle
scale_tsne_res = {
'Y_scaletsne_p50':Y_scaletsne_p50, 'betas50': betas50,
'Y_scaletsne_p100':Y_scaletsne_p100, 'betas100': betas100,
'Y_scaletsne_p500':Y_scaletsne_p500, 'betas500': betas500,
'Y_scaletsne_p1000':Y_scaletsne_p1000, 'betas1000': betas1000
}
with open('examples/Farrell2018/farrell_scale_tsne_res.pkl', 'wb') as handle:
pickle.dump(scale_tsne_res, handle, protocol=pickle.HIGHEST_PROTOCOL)
idx = np.random.randint(low = 0, high = X50PCs.shape[0], size = 12000)
idx = np.unique(idx)
X50PCs_sub = X50PCs[idx, :]
X50PCs_sub.shape
start = time.time()
Y_difftsne_p50_t10 = diffusion_tsne(
X50PCs, perplexity=50, seed=42, scale_probs = False,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/perp50_step10_affinities/")
end = time.time()
print('Diffusion t-SNE embedding in %f min' %((end-start)/60))
# Diffusion t-SNE embedding in 22.115429 min
plot2D(Y_difftsne_p50_t10, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_difftsne_p200_t10 = diffusion_tsne(
X50PCs, perplexity=200, seed=42, scale_probs = False,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/perp100_step10_affinities/")
end = time.time()
print('Diffusion t-SNE embedding in %f min' %((end-start)/60))
# Diffusion t-SNE embedding in 22.115429 min
plot2D(Y_difftsne_p200_t10, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_difftsne_p300_t10 = diffusion_tsne(
X50PCs, perplexity=300, seed=42, scale_probs = False,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/perp300_step10_affinities/")
end = time.time()
print('Diffusion t-SNE embedding in %f min' %((end-start)/60))
# Diffusion t-SNE embedding in 22.115429 min
plot2D(Y_difftsne_p300_t10, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_difftsne_p500_t10 = diffusion_tsne(
X50PCs, perplexity=500, seed=42, scale_probs = False,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/perp500_step10_affinities/")
end = time.time()
print('Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_difftsne_p500_t10, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_difftsne_p1000_t10 = diffusion_tsne(
X50PCs, perplexity=1000, seed=42, scale_probs = False,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/perp1000_step10_affinities/")
end = time.time()
print('Diffusion t-SNE embedding in %f min' %((end-start)/60))
# Diffusion t-SNE embedding in 22.115429 min
plot2D(Y_difftsne_p1000_t10, cell_info["Stage"],
col_map = cols, s=6, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
import pickle
difftsne_res = {
'Y_difftsne_p50_t10':Y_difftsne_p50_t10,
'Y_difftsne_p200_t10':Y_difftsne_p200_t10,
'Y_difftsne_p300_t10':Y_difftsne_p300_t10,
'Y_difftsne_p500_t10':Y_difftsne_p500_t10,
'Y_difftsne_p1000_t10':Y_difftsne_p1000_t10,
}
with open('farrell_difftsne_res.pkl', 'wb') as handle:
pickle.dump(difftsne_res, handle, protocol=pickle.HIGHEST_PROTOCOL)
start = time.time()
Y_scaled_difftsne_p50_t10 = diffusion_tsne(
X50PCs, perplexity=50, seed=42, scale_probs = True,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp50_step10_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p50_t10,cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p100_t20 = diffusion_tsne(
X50PCs, perplexity=100, seed=42, scale_probs = True,
time_steps = 20, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp100_step20_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p100_t20,cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p300_t10 = diffusion_tsne(
X50PCs, perplexity=300, seed=42, scale_probs = True,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp300_step10_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p300_t10, cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p300_t50 = diffusion_tsne(
X50PCs, perplexity=300, seed=42, scale_probs = True,
time_steps = 50, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp300_step50_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p300_t50, cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p500_t5 = diffusion_tsne(
X50PCs, perplexity=500, seed=42, scale_probs = True,
time_steps = 5, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp500_step5_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p500_t5, cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p500_t10 = diffusion_tsne(
X50PCs, perplexity=500, seed=42, scale_probs = True,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp500_step10_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p500_t10, cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p500_t20 = diffusion_tsne(
X50PCs, perplexity=500, seed=42, scale_probs = True,
time_steps = 20, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp500_step20_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p500_t20, cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
start = time.time()
Y_scaled_difftsne_p1000_t10 = diffusion_tsne(
X50PCs, perplexity=1000, seed=42, scale_probs = True,
time_steps = 10, nthreads=nthreads, load_affinities="save",
save_files=True, affinities_dir = "./data/farrell2018/scale_perp1000_step10_affinities/")
end = time.time()
print('Scaled Diffusion t-SNE embedding in %f min' %((end-start)/60))
plot2D(Y_scaled_difftsne_p1000_t10, cell_info["Stage"],
col_map = cols, s=10, figsize = (11, 11))
plt.legend(
fontsize=16, markerscale=6,
loc='upper center', bbox_to_anchor=(1.2, 0.85),
shadow=True, ncol=1)
plt.axis('equal')
import pickle
scale_difftsne_res = {
'Y_scaled_difftsne_p50_t10':Y_scaled_difftsne_p50_t10,
'Y_scaled_difftsne_p100_t20':Y_scaled_difftsne_p100_t20,
'Y_scaled_difftsne_p300_t10':Y_scaled_difftsne_p300_t10,
'Y_scaled_difftsne_p300_t50':Y_scaled_difftsne_p300_t50,
'Y_scaled_difftsne_p500_t5':Y_scaled_difftsne_p500_t5,
'Y_scaled_difftsne_p500_t10':Y_scaled_difftsne_p500_t10,
'Y_scaled_difftsne_p500_t20':Y_scaled_difftsne_p500_t20,
'Y_scaled_difftsne_p1000_t10':Y_scaled_difftsne_p1000_t10
}
with open('examples/Farrell2018/farrell_scale_difftsne_res.pkl', 'wb') as handle:
pickle.dump(scale_difftsne_res, handle, protocol=pickle.HIGHEST_PROTOCOL)